# Import packages
#install.packages("corrplot")
library(dplyr)
library(data.table)
library(ggplot2)
library(pastecs)
Attaching package: <91>pastecs<92>
The following objects are masked from <91>package:data.table<92>:
first, last
The following objects are masked from <91>package:dplyr<92>:
first, last
library(corrplot)
#library(ggthemes) # For appearance of plot like theme in ggplot2
# Setting environment
# remove(list=ls())
# setwd("C:\\Users\\sunil\\Downloads\\College\\DAV\\Project")
# make evironment not to change large number to exponential
options(scipen = 999)
# Import dataset
nepal_dt <- read.csv("Source Dataset-API_NPL_DS2.csv", skip=4, header=TRUE, stringsAsFactors = FALSE)
meta_country <- read.csv("MetaData_Country.csv", header=TRUE, stringsAsFactors = FALSE)
meta_indictr <- read.csv("MetaData_Indicator.csv", header=TRUE, stringsAsFactors = FALSE)
nepal_dt
meta_country
meta_indictr
Data Preparation: Preparing data after the import
temp_df = filter(nepal_dt, grepl("tax", tolower(IndicatorName), fixed = TRUE) | grepl("tax", tolower(IndicatorCode), fixed = TRUE))
nepal_df <- temp_df
nepal_df
dim(nepal_df)
[1] 53 66
temp_df = filter(nepal_dt, grepl("gdp", tolower(IndicatorName), fixed = TRUE) | grepl("gdp", tolower(IndicatorCode), fixed = TRUE))
nepal_df <- rbind(nepal_df, temp_df)
nepal_df
dim(nepal_df)
[1] 143 66
temp_df = filter(nepal_dt, grepl("employment", tolower(IndicatorName), fixed = TRUE) | grepl("employment", tolower(IndicatorCode), fixed = TRUE))
nepal_df <- rbind(nepal_df, temp_df)
nepal_df
# Drop first and second column
nepal_df <- nepal_df[-c(1,2)]
nepal_df
# unique(nepal_df$IndicatorName)
#table(tolower(nepal_df$IndicatorName))
# Transposing the dataframe
# df_t <- (t(nepal_df))
df_t <- transpose(nepal_df)
rownames(df_t) <- colnames(nepal_df)
colnames(df_t) <- rownames(nepal_df)
View(df_t)
df_t[0,]
# Rename the columns with the first row. Columns are not properly renamed from above lines.
colnames(df_t) <- df_t[2,]
# Remove the first and second row.
df_t <- df_t[-1:-2,]
nepal_df <- df_t
View(nepal_df)
# Keep rownames as a first column
#setDT(df_t, keep.rownames = TRUE)[]
nepal_df <- cbind(names = rownames(nepal_df), nepal_df)
colnames(nepal_df)[1] <- "YEAR"
# Removing a character 'X' from the column: YEAR in nepal_df
nepal_df$YEAR <- gsub("X","",as.character(nepal_df$YEAR))
nepal_df
dim(nepal_df)[2]
[1] 243
nepal_df
# Converting columns to numeric types
#nepal_df$TM.TAX.MRCH.WM.AR.ZS = as.numeric(as.character(nepal_df$TM.TAX.MRCH.WM.AR.ZS))
#nepal_df$NY.GDP.PETR.RT.ZS = as.numeric(as.character(nepal_df$NY.GDP.PETR.RT.ZS))
nepal_df[1:dim(nepal_df)[2]] <- sapply(nepal_df[1:dim(nepal_df)[2]],as.numeric)
sapply(nepal_df, class)
YEAR TM.TAX.MRCH.WM.AR.ZS TM.TAX.MRCH.IP.ZS NY.TAX.NIND.KN
"numeric" "numeric" "numeric" "numeric"
TM.TAX.TCOM.BC.ZS TM.TAX.MANF.BC.ZS GC.TAX.INTT.RV.ZS TM.TAX.MRCH.WM.FN.ZS
"numeric" "numeric" "numeric" "numeric"
TM.TAX.MRCH.SM.AR.ZS TM.TAX.TCOM.IP.ZS TM.TAX.MANF.IP.ZS IC.TAX.GIFT.ZS
"numeric" "numeric" "numeric" "numeric"
GC.TAX.TOTL.GD.ZS GC.TAX.GSRV.VA.ZS IC.TAX.LABR.CP.ZS GC.TAX.YPKG.CN
"numeric" "numeric" "numeric" "numeric"
TM.TAX.MRCH.BR.ZS NY.TAX.NIND.CN TM.TAX.MRCH.SR.ZS IC.TAX.OTHR.CP.ZS
"numeric" "numeric" "numeric" "numeric"
GC.TAX.YPKG.ZS GC.TAX.IMPT.ZS GC.TAX.OTHR.CN GC.TAX.IMPT.CN
"numeric" "numeric" "numeric" "numeric"
TM.TAX.TCOM.WM.AR.ZS TM.TAX.MANF.WM.AR.ZS IC.TAX.PAYM GC.TAX.EXPT.CN
"numeric" "numeric" "numeric" "numeric"
IC.TAX.TOTL.CP.ZS IC.FRM.INFM.ZS GC.TAX.GSRV.CN GC.TAX.INTT.CN
"numeric" "numeric" "numeric" "numeric"
TM.TAX.TCOM.WM.FN.ZS TM.TAX.MANF.WM.FN.ZS TM.TAX.MRCH.SM.FN.ZS TM.TAX.TCOM.SM.AR.ZS
"numeric" "numeric" "numeric" "numeric"
TM.TAX.MANF.SM.AR.ZS IC.FRM.METG.ZS GC.TAX.GSRV.RV.ZS TM.TAX.MRCH.BC.ZS
"numeric" "numeric" "numeric" "numeric"
NY.TAX.NIND.CD TM.TAX.TCOM.SM.FN.ZS TM.TAX.MANF.SM.FN.ZS IC.TAX.METG
"numeric" "numeric" "numeric" "numeric"
GC.TAX.YPKG.RV.ZS IC.TAX.DURS GC.TAX.TOTL.CN TM.TAX.TCOM.BR.ZS
"numeric" "numeric" "numeric" "numeric"
TM.TAX.MANF.BR.ZS TM.TAX.TCOM.SR.ZS TM.TAX.MANF.SR.ZS IC.TAX.PRFT.CP.ZS
"numeric" "numeric" "numeric" "numeric"
GC.TAX.EXPT.ZS GC.TAX.OTHR.RV.ZS TG.VAL.TOTL.GD.ZS NY.GDP.MKTP.KD
"numeric" "numeric" "numeric" "numeric"
NY.GDP.COAL.RT.ZS NY.GDP.PCAP.PP.KD NY.GDP.MINR.RT.ZS NY.GDP.MKTP.KN
"numeric" "numeric" "numeric" "numeric"
NY.GDP.DEFL.KD.ZG.AD NV.SRV.TOTL.ZS ER.GDP.FWTL.M3.KD BX.TRF.PWKR.DT.GD.ZS
"numeric" "numeric" "numeric" "numeric"
SL.GDP.PCAP.EM.KD SE.XPD.TERT.PC.ZS NY.GDS.TOTL.ZS NY.GDP.MKTP.KD.ZG
"numeric" "numeric" "numeric" "numeric"
NY.GDP.DEFL.KD.ZG SH.XPD.CHEX.GD.ZS SE.XPD.PRIM.PC.ZS NY.GDP.PETR.RT.ZS
"numeric" "numeric" "numeric" "numeric"
NY.GDP.MKTP.CD NE.DAB.TOTL.ZS SH.XPD.GHED.GD.ZS SE.XPD.TOTL.GD.ZS
"numeric" "numeric" "numeric" "numeric"
PA.NUS.PPPC.RF NY.GDP.MKTP.PP.KD NY.GDP.DEFL.ZS.AD NE.GDI.TOTL.ZS
"numeric" "numeric" "numeric" "numeric"
GC.TAX.TOTL.GD.ZS FS.AST.DOMS.GD.ZS FM.AST.PRVT.GD.ZS EN.ATM.CO2E.KD.GD
"numeric" "numeric" "numeric" "numeric"
NY.GDP.PCAP.PP.CD NY.GDP.FRST.RT.ZS NE.GDI.FTOT.ZS SE.XPD.SECO.PC.ZS
"numeric" "numeric" "numeric" "numeric"
NY.GDP.MKTP.CN.AD NV.IND.MANF.ZS NE.TRD.GNFS.ZS GC.REV.XGRT.GD.ZS
"numeric" "numeric" "numeric" "numeric"
GB.XPD.RSDV.GD.ZS EG.USE.COMM.GD.PP.KD GC.NLD.TOTL.GD.ZS BN.CAB.XOKA.GD.ZS
"numeric" "numeric" "numeric" "numeric"
BG.GSR.NFSV.GD.ZS NE.CON.PRVT.ZS GC.LBL.TOTL.GD.ZS FS.AST.PRVT.GD.ZS
"numeric" "numeric" "numeric" "numeric"
BM.KLT.DINV.WD.GD.ZS NY.GDP.PCAP.KD NY.GDP.FCST.CN FS.AST.CGOV.GD.ZS
"numeric" "numeric" "numeric" "numeric"
EN.ATM.CO2E.PP.GD EG.GDP.PUSE.KO.PP.KD EG.EGY.PRIM.PP.KD GC.NFN.TOTL.GD.ZS
"numeric" "numeric" "numeric" "numeric"
FM.LBL.BMNY.GD.ZS NY.GDP.PCAP.KD.ZG NY.GDP.FCST.KD NY.GDP.TOTL.RT.ZS
"numeric" "numeric" "numeric" "numeric"
NY.GDP.MKTP.CN NE.RSB.GNFS.ZS MS.MIL.XPND.GD.ZS NY.GDP.NGAS.RT.ZS
"numeric" "numeric" "numeric" "numeric"
NY.GDP.DISC.CN NV.IND.TOTL.ZS NE.GDI.FPRV.ZS GC.DOD.TOTL.GD.ZS
"numeric" "numeric" "numeric" "numeric"
FS.AST.DOMO.GD.ZS EN.ATM.CO2E.PP.GD.KD BX.KLT.DINV.WD.GD.ZS NY.GDP.PCAP.KN
"numeric" "numeric" "numeric" "numeric"
NY.GDP.FCST.KN NE.IMP.GNFS.ZS NY.GNS.ICTR.ZS NY.GDP.PCAP.CD
"numeric" "numeric" "numeric" "numeric"
NY.GDP.DISC.KN NV.AGR.TOTL.ZS CM.MKT.TRAD.GD.ZS CM.MKT.LCAP.GD.ZS
"numeric" "numeric" "numeric" "numeric"
PA.NUS.PPP NY.GDP.MKTP.PP.CD NY.GDP.DEFL.ZS NE.EXP.GNFS.ZS
"numeric" "numeric" "numeric" "numeric"
NY.GDP.PCAP.CN NY.GDP.FCST.CD NE.CON.TOTL.ZS GC.AST.TOTL.GD.ZS
"numeric" "numeric" "numeric" "numeric"
EG.GDP.PUSE.KO.PP NE.CON.GOVT.ZS GC.XPN.TOTL.GD.ZS FD.AST.PRVT.GD.ZS
"numeric" "numeric" "numeric" "numeric"
SL.UEM.NEET.ZS SL.UEM.1524.FE.ZS SL.SRV.EMPL.ZS SL.FAM.WORK.ZS
"numeric" "numeric" "numeric" "numeric"
SL.EMP.TOTL.SP.FE.ZS SL.AGR.EMPL.MA.ZS per_lm_alllm.cov_q5_tot SL.UEM.INTM.MA.ZS
"numeric" "numeric" "numeric" "numeric"
SL.TLF.PART.ZS SL.TLF.0714.WK.MA.ZS SL.SRV.0714.MA.ZS SL.FAM.0714.MA.ZS
"numeric" "numeric" "numeric" "numeric"
SL.EMP.SELF.MA.ZS SL.AGR.0714.FE.ZS per_lm_alllm.cov_q1_tot SL.UEM.TOTL.FE.ZS
"numeric" "numeric" "numeric" "numeric"
SL.UEM.1524.MA.ZS SL.TLF.0714.MA.ZS SL.IND.EMPL.FE.ZS SL.EMP.TOTL.SP.MA.ZS
"numeric" "numeric" "numeric" "numeric"
SL.EMP.1524.SP.FE.NE.ZS SL.UEM.TOTL.FE.NE.ZS SL.UEM.1524.MA.NE.ZS SL.TLF.0714.FE.ZS
"numeric" "numeric" "numeric" "numeric"
SL.EMP.TOTL.SP.MA.NE.ZS SL.AGR.EMPL.ZS SL.UEM.INTM.ZS SL.SRV.0714.ZS
"numeric" "numeric" "numeric" "numeric"
SL.FAM.0714.ZS SL.EMP.SELF.ZS SL.AGR.0714.MA.ZS per_lm_alllm.cov_q2_tot
"numeric" "numeric" "numeric" "numeric"
SL.UEM.TOTL.MA.ZS SL.UEM.1524.ZS SL.TLF.0714.SW.FE.ZS SL.IND.EMPL.ZS
"numeric" "numeric" "numeric" "numeric"
SL.EMP.TOTL.SP.ZS SL.EMP.1524.SP.MA.NE.ZS SL.UEM.INTM.FE.ZS SL.TLF.PART.MA.ZS
"numeric" "numeric" "numeric" "numeric"
SL.SRV.0714.FE.ZS SL.FAM.0714.FE.ZS SL.EMP.SELF.FE.ZS per_lm_alllm.cov_pop_tot
"numeric" "numeric" "numeric" "numeric"
SL.UEM.NEET.MA.ZS SL.UEM.1524.FE.NE.ZS SL.TLF.0714.ZS SL.SRV.EMPL.MA.ZS
"numeric" "numeric" "numeric" "numeric"
SL.FAM.WORK.MA.ZS SL.EMP.TOTL.SP.FE.NE.ZS SL.AGR.EMPL.FE.ZS per_lm_alllm.cov_q4_tot
"numeric" "numeric" "numeric" "numeric"
SL.WAG.0714.MA.ZS SL.UEM.BASC.FE.ZS SL.TLF.0714.SW.ZS SL.SLF.0714.FE.ZS
"numeric" "numeric" "numeric" "numeric"
SL.EMP.WORK.FE.ZS SL.EMP.MPYR.FE.ZS SL.WAG.0714.ZS SL.UEM.BASC.MA.ZS
"numeric" "numeric" "numeric" "numeric"
SL.SLF.0714.MA.ZS SL.EMP.WORK.MA.ZS SL.EMP.MPYR.MA.ZS per_lm_alllm.adq_pop_tot
"numeric" "numeric" "numeric" "numeric"
SL.UEM.NEET.FE.ZS SL.TLF.0714.WK.ZS SL.SRV.EMPL.FE.ZS SL.FAM.WORK.FE.ZS
"numeric" "numeric" "numeric" "numeric"
SL.EMP.SMGT.FE.ZS SL.AGR.0714.ZS per_lm_alllm.cov_q3_tot SL.UEM.TOTL.NE.ZS
"numeric" "numeric" "numeric" "numeric"
SL.UEM.ADVN.FE.ZS SL.MNF.0714.FE.ZS SL.EMP.VULN.FE.ZS SL.EMP.1524.SP.MA.ZS
"numeric" "numeric" "numeric" "numeric"
SL.UEM.BASC.ZS SL.TLF.PART.FE.ZS SL.TLF.0714.WK.FE.ZS SL.SLF.0714.ZS
"numeric" "numeric" "numeric" "numeric"
SL.EMP.WORK.ZS SL.EMP.MPYR.ZS per_lm_alllm.ben_q1_tot SL.UEM.TOTL.ZS
"numeric" "numeric" "numeric" "numeric"
SL.UEM.ADVN.MA.ZS SL.TLF.0714.SW.MA.ZS SL.MNF.0714.MA.ZS SL.EMP.VULN.MA.ZS
"numeric" "numeric" "numeric" "numeric"
SL.EMP.1524.SP.NE.ZS SL.UEM.TOTL.MA.NE.ZS SL.UEM.1524.NE.ZS SL.IND.EMPL.MA.ZS
"numeric" "numeric" "numeric" "numeric"
SL.EMP.TOTL.SP.NE.ZS SL.EMP.1524.SP.FE.ZS SL.WAG.0714.FE.ZS SL.UEM.ADVN.ZS
"numeric" "numeric" "numeric" "numeric"
SL.MNF.0714.ZS SL.EMP.VULN.ZS SL.EMP.1524.SP.ZS
"numeric" "numeric" "numeric"
# Replace NA values with 0
#nepal_df["TM.TAX.MRCH.WM.AR.ZS"][is.na(nepal_df["TM.TAX.MRCH.WM.AR.ZS"])] <- 0
#nepal_df["NY.GDP.PETR.RT.ZS"][is.na(nepal_df["NY.GDP.PETR.RT.ZS"])] <- 0
# Replace na values with 0 using is.na()
nepal_df[is.na(nepal_df)] <- 0
nepal_df
# Viewing the data after preparing it.
View(nepal_df)
Parameter Selection:
## Sample parameters selection to achieve project objective.
# GC.TAX.GSRV.VA.ZS -> Taxes on goods and services
# GC.TAX.GSRV.CN
# GC.TAX.TOTL.GD.ZS -> Tax revenue (% of GDP)
# IC.TAX.LABR.CP.ZS -> Labor tax and contributions (% of commercial profits) | Labor tax and contributions is the amount of taxes and mandatory contributions on labor paid by the business.
# GC.TAX.YPKG.CN -> Taxes on income, profits and capital gains (current LCU)
# GC.TAX.IMPT.ZS -> Customs and other import duties (% of tax revenue)
# GC.TAX.EXPT.CN -> Taxes on exports (current LCU)
# IC.TAX.TOTL.CP.ZS -> Total tax and contribution rate (% of profit)
# NY.GDP.MKTP.KD -> GDP (constant 2015 US$)
# NY.GDP.MKTP.KD.ZG -> GDP growth (annual %)
# SL.IND.EMPL.ZS -> Employment in industry (% of total employment) (modeled ILO estimate)
# SL.IND.EMPL.FE.ZS -> Employment in industry, female (% of female employment) (modeled ILO estimate)
# SL.IND.EMPL.MA.ZS -> Employment in industry, male (% of male employment) (modeled ILO estimate)
# SL.AGR.EMPL.ZS -> Employment in agriculture (% of total employment) (modeled ILO estimate)
# SL.AGR.EMPL.FE.ZS -> Employment in agriculture, female (% of female employment) (modeled ILO estimate)
# SL.AGR.EMPL.MA.ZS -> Employment in agriculture, male (% of male employment) (modeled ILO estimate)
## Sample parameter selection to achieve project objective.
# GC.TAX.GSRV.VA.ZS, NY.GDP.MKTP.KD 0.8481471
# GC.TAX.GSRV.VA.ZS, SL.IND.EMPL.ZS 0.8880489
# GC.TAX.GSRV.VA.ZS, SL.IND.EMPL.FE.ZS 0.8928028
# GC.TAX.GSRV.VA.ZS, SL.IND.EMPL.MA.ZS 0.8939309
# GC.TAX.GSRV.VA.ZS, SL.AGR.EMPL.ZS 0.8268747
# GC.TAX.GSRV.VA.ZS, SL.AGR.EMPL.FE.ZS 0.8333567
# GC.TAX.GSRV.VA.ZS, SL.AGR.EMPL.MA.ZS 0.8062022
# GC.TAX.INTT.RV.ZS, SL.IND.EMPL.ZS 0.727295
# GC.TAX.INTT.RV.ZS, SL.IND.EMPL.FE.ZS 0.7059692
# GC.TAX.INTT.RV.ZS, SL.IND.EMPL.MA.ZS 0.7179946
# GC.TAX.TOTL.GD.ZS, SL.IND.EMPL.ZS 0.893035
# GC.TAX.TOTL.GD.ZS, SL.IND.EMPL.FE.ZS 0.8984195
# GC.TAX.TOTL.GD.ZS, SL.IND.EMPL.MA.ZS 0.8992892
# IC.TAX.LABR.CP.ZS
# GC.TAX.YPKG.CN
# GC.TAX.IMPT.ZS
# GC.TAX.EXPT.CN
# IC.TAX.TOTL.CP.ZS
## Sample parameters selection to achieve project objective.
nepal_df <- select(nepal_df, 'YEAR', 'GC.TAX.GSRV.VA.ZS', 'GC.TAX.GSRV.CN', 'GC.TAX.TOTL.GD.ZS', 'IC.TAX.LABR.CP.ZS', 'GC.TAX.YPKG.CN', 'GC.TAX.IMPT.ZS', 'GC.TAX.EXPT.CN', 'IC.TAX.TOTL.CP.ZS', 'NY.GDP.MKTP.KD', 'NY.GDP.MKTP.KD.ZG', 'SL.IND.EMPL.ZS', 'SL.IND.EMPL.FE.ZS', 'SL.IND.EMPL.MA.ZS', 'SL.AGR.EMPL.ZS', 'SL.AGR.EMPL.FE.ZS', 'SL.AGR.EMPL.MA.ZS')
nepal_df
Data Quality: Checking the data
summary(nepal_df)
YEAR GC.TAX.GSRV.VA.ZS GC.TAX.GSRV.CN GC.TAX.TOTL.GD.ZS IC.TAX.LABR.CP.ZS
Min. :1960 Min. : 0.000 Min. : 0 Min. : 0.000 Min. : 0.000
1st Qu.:1975 1st Qu.: 0.000 1st Qu.: 0 1st Qu.: 0.000 1st Qu.: 0.000
Median :1990 Median : 3.186 Median : 1743600000 Median : 3.242 Median : 0.000
Mean :1990 Mean : 4.652 Mean : 44124665452 Mean : 5.477 Mean : 3.113
3rd Qu.:2006 3rd Qu.: 7.953 3rd Qu.: 26728675000 3rd Qu.: 8.983 3rd Qu.: 0.000
Max. :2021 Max. :16.909 Max. :424000000000 Max. :19.809 Max. :26.200
GC.TAX.YPKG.CN GC.TAX.IMPT.ZS GC.TAX.EXPT.CN IC.TAX.TOTL.CP.ZS NY.GDP.MKTP.KD
Min. : 0 Min. : 0.000 Min. : 0 Min. : 0.000 Min. : 3312006963
1st Qu.: 0 1st Qu.: 0.000 1st Qu.: 0 1st Qu.: 0.000 1st Qu.: 4680519798
Median : 384000000 Median : 8.884 Median : 16100000 Median : 0.000 Median : 8516316247
Mean : 21067623604 Mean :13.259 Mean : 212167572 Mean : 7.863 Mean :11498688828
3rd Qu.: 8722050000 3rd Qu.:26.993 3rd Qu.: 366500000 3rd Qu.: 0.000 3rd Qu.:16270064206
Max. :213000000000 Max. :36.967 Max. :1069880000 Max. :41.800 Max. :31149050463
NY.GDP.MKTP.KD.ZG SL.IND.EMPL.ZS SL.IND.EMPL.FE.ZS SL.IND.EMPL.MA.ZS SL.AGR.EMPL.ZS SL.AGR.EMPL.FE.ZS
Min. :-2.977 Min. : 0.000 Min. :0.000 Min. : 0.000 Min. : 0.00 Min. : 0.00
1st Qu.: 2.025 1st Qu.: 0.000 1st Qu.:0.000 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.00
Median : 4.041 Median : 0.000 Median :0.000 Median : 0.000 Median : 0.00 Median : 0.00
Mean : 3.720 Mean : 4.974 Mean :2.703 Mean : 7.345 Mean :34.22 Mean :38.54
3rd Qu.: 5.258 3rd Qu.:11.735 3rd Qu.:6.260 3rd Qu.:16.938 3rd Qu.:72.10 3rd Qu.:81.94
Max. : 9.681 Max. :15.110 Max. :8.620 Max. :23.310 Max. :82.33 Max. :90.39
SL.AGR.EMPL.MA.ZS
Min. : 0.00
1st Qu.: 0.00
Median : 0.00
Mean :29.81
3rd Qu.:62.73
Max. :74.79
Correlation Analysis: Exploring relationship between employment, tax and GDP. Understanding what drives economic activity.
# Finding correlation between each columns in the dataframe
# cor(nepal_df$TM.TAX.MRCH.WM.AR.ZS, nepal_df$NY.GDP.PETR.RT.ZS)
# cor(nepal_df$GC.TAX.TOTL.GD.ZS, nepal_df$SL.IND.EMPL.FE.ZS)
View(cor(nepal_df))
# Correlation matrix plot
corrplot(cor(nepal_df), type="lower")
var(nepal_df$GC.TAX.GSRV.VA.ZS)
[1] 26.21113
# SL.IND.EMPL.ZS NY.GDP.MKTP.KD
Time series analysis: Trends/patterns in the data over time
# autoregressive integrated moving average (ARIMA) - need to look at it
# GDP = Consumption + Investment + Government spending + Net exports
p <- ggplot(nepal_df, aes(x=nepal_df$YEAR, y=nepal_df$GC.TAX.GSRV.VA.ZS)) +
geom_line( color="steelblue") +
geom_point() +
xlab("YEAR") +
ylab("Taxes on goods and services(%)") +
ggtitle("Percent increase on tax on goods & services each year")
#scale_x_date(limit=c(as.Date("1960-01-01"),as.Date("2022-12-30"))) +
p
# Check tax and gdp over time
coeff <- 10
tax_color <- "black"
gdp_color <- "steelblue"
ggplot(nepal_df, aes(x=nepal_df$YEAR)) +
geom_line( aes(y=nepal_df$GC.TAX.GSRV.CN), size=0.5, color=tax_color) +
geom_line( aes(y=nepal_df$NY.GDP.MKTP.KD), size=0.5, color=gdp_color) +
geom_point(aes(y = nepal_df$GC.TAX.GSRV.CN), size=2, color=tax_color) +
geom_point(aes(y = nepal_df$NY.GDP.MKTP.KD), size=2, color=gdp_color) +
scale_y_continuous(
# First axis
name = "Taxes on goods and services (current LCU)",
# Second axis
sec.axis = sec_axis(~.*1, name="GDP (constant 2015 US$)")
) +
# theme_ipsum() +
scale_x_continuous(
name = "YEAR"
) +
theme(
axis.title.y = element_text(color = tax_color, size=13),
axis.title.y.right = element_text(color = gdp_color, size=13)
) +
ggtitle("Tax and GDP over time") +
theme(plot.title = element_text(hjust = 0.5)) #Title to be at center
Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
coeff <- 10
Warning messages:
1: In readChar(file, size, TRUE) : truncating string with embedded nuls
2: In readChar(file, size, TRUE) : truncating string with embedded nuls
3: In readChar(file, size, TRUE) : truncating string with embedded nuls
4: In readChar(file, size, TRUE) : truncating string with embedded nuls
5: In readChar(file, size, TRUE) : truncating string with embedded nuls
6: In readChar(file, size, TRUE) : truncating string with embedded nuls
tax_color <- "black"
gdp_color <- "steelblue"
ggplot(nepal_df, aes(x=nepal_df$YEAR)) +
geom_line( aes(y=nepal_df$SL.IND.EMPL.ZS), size=0.5, color=tax_color) +
geom_line( aes(y=nepal_df$SL.AGR.EMPL.ZS), size=0.5, color=gdp_color) +
geom_point(aes(y = nepal_df$SL.IND.EMPL.ZS), size=2, color=tax_color) +
geom_point(aes(y = nepal_df$SL.AGR.EMPL.ZS), size=2, color=gdp_color) +
scale_y_continuous(
# First axis
name = "Employment in industry (% of total employment)",
# Second axis
sec.axis = sec_axis(~.*1, name="Employment in agriculture (% of total employment)")
) +
# theme_ipsum() +
scale_x_continuous(
name = "YEAR"
) +
theme(
axis.title.y = element_text(color = tax_color, size=13),
axis.title.y.right = element_text(color = gdp_color, size=13)
) +
ggtitle("Employment in industry & agriculture over time") +
theme(plot.title = element_text(hjust = 0.5)) #Title to be at center
ggplot(nepal_df, aes(x = GC.TAX.GSRV.CN, y = NY.GDP.MKTP.KD)) +
geom_point() +
geom_smooth() +
# Add a regression line
xlab("Taxes on goods and services (current LCU)") +
ylab("GDP (constant 2015 US$)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: GDP x taxes on goods & services")
# Checking GDP growth on every tax % increase
# with trend line (regression line)
ggplot(nepal_df, aes(x = GC.TAX.GSRV.VA.ZS, y = NY.GDP.MKTP.KD)) +
geom_point() +
geom_smooth() + # Add a regression line
xlab("Taxes on goods and services (% value added of industry and services)") +
ylab("GDP (constant 2015 US$)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: GDP x taxes on goods & services")
ggplot(nepal_df, aes(x = SL.IND.EMPL.ZS, y = GC.TAX.GSRV.VA.ZS)) +
geom_point() +
geom_smooth() + # Add a regression line
xlab("Employment in industry (% of total employment)") +
ylab("Taxes on goods and services (% value added of industry and services)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Tax on goods & services X Employment in industry")
ggplot(nepal_df, aes(x = GC.TAX.GSRV.VA.ZS, y = SL.AGR.EMPL.ZS )) +
geom_point() +
geom_smooth() + # Add a regression line
xlab("Taxes on goods and services (% value added of industry and services)") +
ylab("Employment in agriculture (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Tax on goods & services X Employment in agriculture")
ggplot(nepal_df, aes(x = GC.TAX.IMPT.ZS, y = SL.IND.EMPL.ZS)) +
geom_point() +
geom_smooth() + # Add a regression line
xlab("Customs and other import duties (% of tax revenue)") +
ylab("Employment in industry (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Customs Import duties X Employment in industry")
ggplot(nepal_df, aes(x = GC.TAX.IMPT.ZS, y = SL.AGR.EMPL.ZS)) +
geom_point() +
geom_smooth() + # Add a regression line
xlab("Customs and other import duties (% of tax revenue)") +
ylab("Employment in agriculture (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Customs Import duties X Employment in agriculture")
ggplot(nepal_df, aes(x = GC.TAX.IMPT.ZS, y = SL.IND.EMPL.ZS)) +
geom_point() +
geom_smooth() + # Add a regression line
xlab("Customs and other import duties (% of tax revenue)") +
ylab("Employment in industry (% of total employment)") +
scale_x_continuous() +
scale_y_continuous() +
ggtitle("Regression: Customs Import duties X Employment in industry")
nepal_df$GC.TAX.EXPT.CN
[1] 0 0 0 0 0 0 0 0 0 0
[11] 0 0 0 0 0 0 0 0 0 0
[21] 0 0 0 0 0 0 0 0 0 0
[31] 32200000 78000000 115000000 141000000 427000000 332000000 150000000 168000000 217000000 378000000
[41] 432000000 493000000 917000000 855600000 527100000 697900000 625284000 698600000 445600000 793800000
[51] 915461000 292395000 861574000 439097000 1069880000 314849716 159554771 125130000 102360000 237634000
[61] 112370000 0
Warning messages:
1: In readChar(file, size, TRUE) : truncating string with embedded nuls
2: In readChar(file, size, TRUE) : truncating string with embedded nuls
3: In readChar(file, size, TRUE) : truncating string with embedded nuls
4: In readChar(file, size, TRUE) : truncating string with embedded nuls
5: In readChar(file, size, TRUE) : truncating string with embedded nuls
6: In readChar(file, size, TRUE) : truncating string with embedded nuls
7: In readChar(file, size, TRUE) : truncating string with embedded nuls
8: In readChar(file, size, TRUE) : truncating string with embedded nuls
9: In readChar(file, size, TRUE) : truncating string with embedded nuls
10: In readChar(file, size, TRUE) : truncating string with embedded nuls
#y = GC.TAX.GSRV.VA.ZS
ggplot(nepal_df, aes(x = nepal_df$SL.IND.EMPL.ZS, y = nepal_df$NY.GDP.MKTP.KD, fill = nepal_df$SL.IND.EMPL.ZS)) +
geom_bar(stat = "identity", position = "dodge", width = 0.08) +
#theme_bw() +
xlab("Employment increase(%)") +
ylab("GDP (constant 2015 US$)") +
theme(axis.text.x = element_text(size = 10)) +
theme(axis.text.y = element_text(size = 10)) +
ggtitle("Bar plot: GDP vs Employment increase(%)")
ggplot(nepal_df, aes(x = nepal_df$GC.TAX.GSRV.VA.ZS, y = nepal_df$NY.GDP.MKTP.KD, fill = nepal_df$GC.TAX.GSRV.VA.ZS)) +
geom_bar(stat = "identity", position = "dodge", width = 0.08) +
#theme_bw() +
xlab("Taxes on goods and services(%)") +
ylab("GDP (constant 2015 US$)") +
theme(axis.text.x = element_text(size = 10)) +
theme(axis.text.y = element_text(size = 10)) +
ggtitle("Bar plot: GDP vs Taxes on goods & services(%)")
#GC.TAX.GSRV.VA.ZS, NY.GDP.MKTP.KD
#a <- filter(nepal_df, YEAR>2012)
#select(a, GC.TAX.GSRV.CN, NY.GDP.MKTP.KD)
nepal_df$NY.GDP.MKTP.KD
Regression:
# R
help("scale_x_continuous")
Cluster:
# C
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.